### clean state level public assistance data from US census
# use tidycensus and census API key to import data directly from census api
### last modified by: Charlotte Ambrozek
## last modified date: 3 May 2023
library(tidycensus)
library(tidyverse)
library(haven)

census_api_key("YOURAPIKEYHERE")

counties <- get_acs(geography = "county",
                        variables = c(snap_total = "C22001_001",
                                      snap_yes = "C22001_002",
                                      cashassist_total = "B19057_001",
                                      cashassist_yes = "B19057_002",
                                      cashorsnap_total = "B19058_001",
                                      cashorsnap_yes = "B19058_002",                                      
                                      medincome = "B19013_001",
                                      numinincpovratio = "B17002_001",
                                      incpovratiolt.5 = "B17002_002",
                                      incpovratio.5to.74 = "B17002_003",
                                      incpovratio.75to.99 = "B17002_004",
                                      incpovratio1to1.24 = "B17002_005",
                                      incpovratio1.25to1.49 = "B17002_006",
                                      incpovratio1.5to1.74 = "B17002_007",
                                      incpovratio1.75to1.84 = "B17002_008",
                                      incpovratio1.85to1.99 = "B17002_009"),
                        year = 2005,
                        survey = "acs1") %>%
  select(!c(moe)) %>%
  pivot_wider(id_cols = c("GEOID", "NAME"),
              names_from = variable,
              values_from = estimate) %>%
  mutate(year = 2005,
         ct_fips = GEOID,
         numpoor = incpovratiolt.5 + incpovratio.5to.74 + incpovratio.75to.99,
         numltwic = incpovratiolt.5 + incpovratio.5to.74 + incpovratio.75to.99 + incpovratio1to1.24 + incpovratio1.25to1.49 + incpovratio1.5to1.74 + incpovratio1.75to1.84,
         numlt2fpl = incpovratiolt.5 + incpovratio.5to.74 + incpovratio.75to.99 + incpovratio1to1.24 + incpovratio1.25to1.49 + incpovratio1.5to1.74 + incpovratio1.75to1.84 + incpovratio1.85to1.99) %>% #note that i've already checked that the B and C variables result in the same estimates of poverty for the same ratio groups, so using one over the other is not a concern
  mutate(sharepoor = numpoor/numinincpovratio,
         shareltwic = numltwic/numinincpovratio,
         sharelt2fpl = numlt2fpl/numinincpovratio,
         sharesnap = snap_yes/snap_total,
         sharecash = cashassist_yes/cashassist_total,
         sharecashorsnap = cashorsnap_yes/cashorsnap_total) %>%
  select(c(year, ct_fips, medincome, sharepoor, shareltwic, sharelt2fpl, sharesnap, sharecash, sharecashorsnap))

for (y in 2006:2018){
  data <- get_acs(geography = "county",
                  variables = c(snap_total = "C22001_001",
                                snap_yes = "C22001_002",
                                cashassist_total = "B19057_001",
                                cashassist_yes = "B19057_002",
                                cashorsnap_total = "B19058_001",
                                cashorsnap_yes = "B19058_002",                                      
                                medincome = "B19013_001",
                                numinincpovratio = "B17002_001",
                                incpovratiolt.5 = "B17002_002",
                                incpovratio.5to.74 = "B17002_003",
                                incpovratio.75to.99 = "B17002_004",
                                incpovratio1to1.24 = "B17002_005",
                                incpovratio1.25to1.49 = "B17002_006",
                                incpovratio1.5to1.74 = "B17002_007",
                                incpovratio1.75to1.84 = "B17002_008",
                                incpovratio1.85to1.99 = "B17002_009"),
                  year = y,
                  survey = "acs1") %>%
    select(!c(moe)) %>%
    pivot_wider(id_cols = c("GEOID", "NAME"),
                names_from = variable,
                values_from = estimate) %>%
    mutate(year = y,
           ct_fips = GEOID,
           numpoor = incpovratiolt.5 + incpovratio.5to.74 + incpovratio.75to.99,
           numltwic = incpovratiolt.5 + incpovratio.5to.74 + incpovratio.75to.99 + incpovratio1to1.24 + incpovratio1.25to1.49 + incpovratio1.5to1.74 + incpovratio1.75to1.84,
           numlt2fpl = incpovratiolt.5 + incpovratio.5to.74 + incpovratio.75to.99 + incpovratio1to1.24 + incpovratio1.25to1.49 + incpovratio1.5to1.74 + incpovratio1.75to1.84 + incpovratio1.85to1.99) %>% #note that i've already checked that the B and C variables result in the same estimates of poverty for the same ratio groups, so using one over the other is not a concern
    mutate(sharepoor = numpoor/numinincpovratio,
           shareltwic = numltwic/numinincpovratio,
           sharelt2fpl = numlt2fpl/numinincpovratio,
           sharesnap = snap_yes/snap_total,
           sharecash = cashassist_yes/cashassist_total,
           sharecashorsnap = cashorsnap_yes/cashorsnap_total) %>%
    select(c(year, ct_fips, medincome, sharepoor, shareltwic, sharelt2fpl, sharesnap, sharecash, sharecashorsnap))
  
  counties <- full_join(counties, data)
}
save(counties, file = "./data/cleaned/countystats.rdata")
write_dta(counties, "./data/cleaned/countystats.dta")
